#### Installing and requiring packages####
install.packages("simcf") # this needs to be done manually, package available here http://faculty.washington.edu/cadolph/?page=60
install.packages("plm")
install.packages("pcse")
install.packages("stats")
install.packages("gmodels")
install.packages("repmis")
install.packages("gap")


library(plm)
library(pcse)
library(simcf)
library(stats)
library(gmodels)
library(repmis)
library(gap)

####Acquiring and viewing data####
replication_states <- repmis::source_DropboxData("replication_states.csv", "bhdeskajeeiewvl", sep = ",", header = TRUE)
replication_gss <- repmis::source_DropboxData("replication_gss.csv", "nhe1tdtios1nvrk", sep = ",", header = TRUE)
replication_nes <- repmis::source_DropboxData("replication_nes.csv", "xprnfbfb2x7fu79", sep = ",", header = TRUE)

#REGRESSIONS####

#### CREATING LAGS and DIFFERENCES ####

replication_states$"l.marketgini" <- lagpanel(replication_states$marketgini,replication_states$statename,replication_states$year,1)
replication_states$"l.postgini" <- lagpanel(replication_states$postgini,replication_states$statename,replication_states$year,1)
replication_states$"l.dempres" <- lagpanel(replication_states$dempres,replication_states$statename,replication_states$year,1)
replication_states$"l.cdprop" <- lagpanel(replication_states$cdprop,replication_states$statename,replication_states$year,1)
replication_states$"l.natpolicy" <- lagpanel(replication_states$natpolicy,replication_states$statename,replication_states$year,1)
replication_states$"l.leftgovpower" <- lagpanel(replication_states$leftgovpower,replication_states$statename,replication_states$year,1)
replication_states$"l.uniondense" <- lagpanel(replication_states$uniondense,replication_states$statename,replication_states$year,1)
replication_states$"l.unemployment" <- lagpanel(replication_states$unemployment,replication_states$statename,replication_states$year,1)
replication_states$"l.manufacturing" <- lagpanel(replication_states$manufacturing,replication_states$statename,replication_states$year,1)
replication_states$"l.gsp" <- lagpanel(replication_states$gsp,replication_states$statename,replication_states$year,1)
replication_states$"l.nonwhite" <- lagpanel(replication_states$nonwhite,replication_states$statename,replication_states$year,1)
replication_states$"l.pop65" <- lagpanel(replication_states$pop65,replication_states$statename,replication_states$year,1)
replication_states$"l.statemin" <- lagpanel(replication_states$statemin,replication_states$statename,replication_states$year,1)


replication_states$"dif.marketgini" <- replication_states$marketgini - replication_states$l.marketgini 
replication_states$"dif.postgini" <- replication_states$postgini - replication_states$l.postgini 
replication_states$"dif.dempres" <- replication_states$dempres - replication_states$l.dempres 
replication_states$"dif.cdprop" <- replication_states$cdprop - replication_states$l.cdprop 
replication_states$"dif.natpolicy" <- replication_states$natpolicy - replication_states$l.natpolicy 
replication_states$"dif.leftgovpower" <- replication_states$leftgovpower - replication_states$l.leftgovpower 
replication_states$"dif.uniondense" <- replication_states$uniondense - replication_states$l.uniondense 
replication_states$"dif.unemployment" <- replication_states$unemployment - replication_states$l.unemployment 
replication_states$"dif.manufacturing" <- replication_states$manufacturing - replication_states$l.manufacturing 
replication_states$"dif.gsp" <- replication_states$gsp - replication_states$l.gsp 
replication_states$"dif.nonwhite" <- replication_states$nonwhite - replication_states$l.nonwhite 
replication_states$"dif.pop65" <- replication_states$pop65 - replication_states$l.pop65 
replication_states$"dif.statemin" <- replication_states$statemin - replication_states$l.statemin


# Subseting and re-organising####
TEMP.before.1995 <- subset(replication_states,replication_states$year < 1995)
TEMP.after.1995 <- subset(replication_states,replication_states$year >= 1995)

#TABLE 2 ####
#running error correction models with OLS (main formula and descriptions on page 421 in the article)

#Market inequality (1976-1994)
  
table2 <- plm(dif.marketgini ~ l.marketgini + dif.dempres + l.dempres + dif.cdprop + l.cdprop + dif.natpolicy + l.natpolicy + dif.leftgovpower + l.leftgovpower + dif.uniondense + l.uniondense + dif.unemployment + l.unemployment + dif.manufacturing + l.manufacturing + dif.gsp + l.gsp + dif.nonwhite + l.nonwhite + dif.pop65 + l.pop65, model = "pooling", data=TEMP.before.1995, na.action = na.omit)
summary(table2)

#plotting residuals
plot(residuals(table2))
hist(residuals(table2))
residuals2 <- summary(table2)$residuals
predicted2 <- predict(table2)
plot(predicted2, residuals2)
lines(lowess(predicted2, residuals2, f=.25, delta=.01*diff(range(predicted2))), col="blue") # lowess line (x,y)


#Market inequality (1995-2006)

table2a <- plm(dif.marketgini ~ l.marketgini + dif.dempres + l.dempres + dif.cdprop + l.cdprop + dif.natpolicy + l.natpolicy + dif.leftgovpower + l.leftgovpower + dif.uniondense + l.uniondense + dif.unemployment + l.unemployment + dif.manufacturing + l.manufacturing + dif.gsp + l.gsp + dif.nonwhite + l.nonwhite + dif.pop65 + l.pop65, model = "pooling", data=TEMP.after.1995, na.action = na.omit)
summary(table2a)
plot(residuals(table2a))
hist(residuals(table2a))

#Post-Transfer Inequality (1976-1994)

table2b <- plm(dif.postgini ~ l.postgini + dif.dempres + l.dempres + dif.cdprop + l.cdprop + dif.natpolicy + l.natpolicy + dif.leftgovpower + l.leftgovpower + dif.uniondense + l.uniondense + dif.unemployment + l.unemployment + dif.manufacturing + l.manufacturing + dif.gsp + l.gsp + dif.nonwhite + l.nonwhite + dif.pop65 + l.pop65, model="pooling", data=TEMP.before.1995, na.action = na.omit)
summary(table2b)
plot(residuals(table2b))
hist(residuals(table2b))


#Post-Transfer Inequality (1995-2006)

table2c <- plm(dif.postgini ~ l.postgini + dif.dempres + l.dempres + dif.cdprop + l.cdprop + dif.natpolicy + l.natpolicy + dif.leftgovpower + l.leftgovpower + dif.uniondense + l.uniondense + dif.unemployment + l.unemployment + dif.manufacturing + l.manufacturing + dif.gsp + l.gsp + dif.nonwhite + l.nonwhite + dif.pop65 + l.pop65, model="pooling", data=TEMP.after.1995, na.action = na.omit)
summary(table2c)
plot(residuals(table2c))
hist(residuals(table2c))

#TABLE 3 ####
#running the same model just with one extra variable ("statemin")

#Market inequality (1976-1994)

table3 <- plm(dif.marketgini ~ l.marketgini + dif.dempres + l.dempres + dif.cdprop + l.cdprop + dif.natpolicy + l.natpolicy + dif.statemin + l.statemin + dif.leftgovpower + l.leftgovpower + dif.uniondense + l.uniondense + dif.unemployment + l.unemployment + dif.manufacturing + l.manufacturing + dif.gsp + l.gsp + dif.nonwhite + l.nonwhite + dif.pop65 + l.pop65, model = "pooling", data=TEMP.before.1995, na.action = na.omit)
summary(table3)
plot(residuals(table3))
hist(residuals(table3))


#Market inequality (1995-2006)

table3a <- plm(dif.marketgini ~ l.marketgini + dif.dempres + l.dempres + dif.cdprop + l.cdprop + dif.natpolicy + l.natpolicy + dif.statemin + l.statemin + dif.leftgovpower + l.leftgovpower + dif.uniondense + l.uniondense + dif.unemployment + l.unemployment + dif.manufacturing + l.manufacturing + dif.gsp + l.gsp + dif.nonwhite + l.nonwhite + dif.pop65 + l.pop65, model="pooling", data=TEMP.after.1995, na.action = na.omit)
summary(table3a)
plot(residuals(table3a))
hist(residuals(table3a))

##pulling regression coefficients into a single table
Table_three <- matrix (c(summary(table3)$coefficients[2:24], (table3a)$coefficients[2:24]), ncol=2, nrow=23, byrow=FALSE)
colnames(Table_three) <- c("1976-1994","1995-2006")
rownames(Table_three) <-c("?? Market Inequality t-1", "?? Democratic President", "Democratic Presidend t-1","?? Democrats in Congress", "Democrats in Congress t-1","?? National Policy Liberalism", "National Policy Liberalism t-1","?? Minimum Wage", "Minimum Wage t-1","?? Left Govt Power", "Left Govt Power t-1","?? Union Density ", " Union Density t-1","?? Unemployment", "Unemployment t-1","?? Manufacturing", "Manufacturing t-1","?? GSP", "GSP t-1","?? Nonwhite Population", "Nonwhite Population t-1","?? Elderly Population", "Elderly Population t-1")
Table_three <- as.table(Table_three)
Table_three

#### REGRESSIONS for the entire 1976-2006 period ####
# Here I wanted to see what the results would look like if instead of assuming a structural break in 1995, we would control for congress variable and run a regression for the entire research period.

table4 <- plm(dif.marketgini ~ l.marketgini + dif.dempres + l.dempres + dif.cdprop + l.cdprop + dif.natpolicy + l.natpolicy + dif.leftgovpower + l.leftgovpower + dif.uniondense + l.uniondense + dif.unemployment + l.unemployment + dif.manufacturing + l.manufacturing + dif.gsp + l.gsp + dif.nonwhite + l.nonwhite + dif.pop65 + l.pop65, model = "pooling", data=replication_states, na.action = na.omit)
summary(table4)

table4a <- plm(dif.postgini ~ l.postgini + dif.dempres + l.dempres + dif.cdprop + l.cdprop + dif.natpolicy + l.natpolicy + dif.leftgovpower + l.leftgovpower + dif.uniondense + l.uniondense + dif.unemployment + l.unemployment + dif.manufacturing + l.manufacturing + dif.gsp + l.gsp + dif.nonwhite + l.nonwhite + dif.pop65 + l.pop65, model="pooling", data=replication_states, na.action = na.omit)
summary(table4a)

#pulling regression coefficients
Table_four <- matrix (c(summary(table4)$coefficients[2:22], (table4a)$coefficients[2:22]), ncol=2, nrow=21, byrow=FALSE)
colnames(Table_four) <- c("?? Market Inequality","?? Post-Transfer Inequality")
rownames(Table_four) <-c("?? Market Inequality t-1", "?? Democratic President", "Democratic Presidend t-1","?? Democrats in Congress", "Democrats in Congress t-1","?? National Policy Liberalism", "National Policy Liberalism t-1","?? Left Govt Power", "Left Govt Power t-1","?? Union Density ", " Union Density t-1","?? Unemployment", "Unemployment t-1","?? Manufacturing", "Manufacturing t-1","?? GSP", "GSP t-1","?? Nonwhite Population", "Nonwhite Population t-1","?? Elderly Population", "Elderly Population t-1")
Table_four <- as.table(Table_four)
Table_four
sink("~output/Table4.out")




